Installing packages

## Install the R packages
#BiocManager::install("scRepertoire")
#install.packages('Seurat')
#BiocManager::install("scater")
#BiocManager::install("scDblFinder")
#BiocManager::install("celldex")
#BiocManager::install("SingleR")
#BiocManager::install("clusterProfiler")
#BiocManager::install("org.Hs.eg.db")
#devtools::install_github('immunogenomics/presto')  
#install.packages("hdf5r")
#remotes::install_github("10xGenomics/loupeR")
#loupeR::setup()
## Loading libraries
suppressPackageStartupMessages({
library(Seurat)
library(scDblFinder)
library(scater)
library(sctransform)
library(scRepertoire)
library(SingleCellExperiment)
library(celldex)
library(SingleR)
library(DropletUtils)
library(dplyr)
library(ggplot2)
library(data.table)
library(tidyverse)
library(loupeR)
library(cowplot)
library(clusterProfiler)
library(biomaRt)
loupeR::setup()
})

Upload seurat object

Reading in data

ATAC <- Read10X_h5("~/ATAC/pbmc_unsorted_3k_filtered_feature_bc_matrix.h5")
## Genome matrix has multiple modalities, returning a list of matrices for this genome
ATAC_object <- CreateSeuratObject(ATAC)
ATAC_object
## An object of class Seurat 
## 117757 features across 3009 samples within 1 assay 
## Active assay: RNA (117757 features, 0 variable features)
##  2 layers present: counts.Gene Expression, counts.Peaks

Finding and removing doublets

ATAC_object <- JoinLayers(ATAC_object)
atac_seq <- as.SingleCellExperiment(ATAC_object)
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Layer 'data' is empty
## Warning: Layer 'scale.data' is empty
atac_seq
## class: SingleCellExperiment 
## dim: 117757 3009 
## metadata(0):
## assays(1): counts
## rownames(117757): MIR1302-2HG FAM138A ... KI270713.1:26202-26986
##   KI270713.1:36937-37838
## rowData names(0):
## colnames(3009): AAACAGCCAACAGGTG-1 AAACATGCAACAACAA-1 ...
##   TTTGTGTTCACTTCAT-1 TTTGTGTTCATGCGTG-1
## colData names(4): orig.ident nCount_RNA nFeature_RNA ident
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## reducedDimNames(0):
## mainExpName: RNA
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## altExpNames(0):
## We need to set.seed() because the scDblFinder command
## uses a probabilist strategy to identify doublets. This
## means that, everytime we run the command, it will produce
## results that are slightly different. The set.seed()
## command will guarantee the same results everytime.
library(dplyr)
set.seed(123)
results <- scDblFinder(atac_seq, returnType = 'table') %>%
  as.data.frame() %>%
  dplyr::filter(type == 'real')
## Warning in .checkSCE(sce, coerce = is.null(samples)): Some cells in `sce` have
## an extremely low read counts; note that these could trigger errors and might
## best be filtered out
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Creating ~2408 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 428 cells excluded from training.
## iter=1, 435 cells excluded from training.
## iter=2, 441 cells excluded from training.
## Threshold found:0.408
## 149 (5%) doublets called
head(results)
##                    type   weighted distanceToNearest distanceToNearestDoublet
## AAACAGCCAACAGGTG-1 real 0.23937491          5.557017                 5.747415
## AAACATGCAACAACAA-1 real 0.42415741          4.361270                 5.495526
## AAACATGCACCTGGTG-1 real 0.00000000          3.174207                32.095882
## AAACCAACACAGCCTG-1 real 0.08263374          5.557017                 5.926677
## AAACCAACAGCAAGAT-1 real 0.20575437          6.939530                 6.939530
## AAACCAACATTGCGAC-1 real 0.09430503          4.766809                 5.374438
##                    distanceToNearestReal      nearestClass  ratio.k3 ratio.k10
## AAACAGCCAACAGGTG-1              5.557017              cell 0.3333333       0.2
## AAACATGCAACAACAA-1              4.361270              cell 0.0000000       0.4
## AAACATGCACCTGGTG-1              3.174207              cell 0.0000000       0.0
## AAACCAACACAGCCTG-1              5.557017              cell 0.3333333       0.1
## AAACCAACAGCAAGAT-1              6.996020 artificialDoublet 0.3333333       0.2
## AAACCAACATTGCGAC-1              4.766809              cell 0.0000000       0.2
##                     ratio.k15 ratio.k20 ratio.k25  ratio.k39 lsizes nfeatures
## AAACAGCCAACAGGTG-1 0.13333333      0.15      0.12 0.20512821   1123       437
## AAACATGCAACAACAA-1 0.46666667      0.55      0.56 0.46153846   2420       768
## AAACATGCACCTGGTG-1 0.00000000      0.00      0.00 0.00000000    421        65
## AAACCAACACAGCCTG-1 0.06666667      0.10      0.08 0.10256410    968       366
## AAACCAACAGCAAGAT-1 0.20000000      0.20      0.20 0.23076923    946       426
## AAACCAACATTGCGAC-1 0.13333333      0.10      0.08 0.07692308   1068       408
##                    nAbove2  src   cxds_score include.in.training        score
## AAACAGCCAACAGGTG-1      64 real 1.600112e-02                TRUE 0.0002391383
## AAACATGCAACAACAA-1     269 real 2.388606e-02                TRUE 0.0417344086
## AAACATGCACCTGGTG-1      13 real 1.714966e-10                TRUE 0.0001717108
## AAACCAACACAGCCTG-1      68 real 1.476810e-02                TRUE 0.0004766944
## AAACCAACAGCAAGAT-1      76 real 1.858442e-02                TRUE 0.0004033735
## AAACCAACATTGCGAC-1      77 real 1.208356e-02                TRUE 0.0010119085
##                      class
## AAACAGCCAACAGGTG-1 singlet
## AAACATGCAACAACAA-1 singlet
## AAACATGCACCTGGTG-1 singlet
## AAACCAACACAGCCTG-1 singlet
## AAACCAACAGCAAGAT-1 singlet
## AAACCAACATTGCGAC-1 singlet
results %>%
  dplyr::count(class)
##     class    n
## 1 doublet  149
## 2 singlet 2860
outfile = file.path('project3_doubletFile.txt')
write.table(results, outfile, sep='\t', quote=F,
            col.names=TRUE, row.names=TRUE)

keep = results %>%
  dplyr::filter(class == "singlet") %>%
  rownames()
ATAC_object = ATAC_object[, keep]
ATAC_object
## An object of class Seurat 
## 117757 features across 2860 samples within 1 assay 
## Active assay: RNA (117757 features, 0 variable features)
##  1 layer present: counts

Filter FeatureSet

## %MT reads
ATAC_object[["percentage_mtDNA"]] <- PercentageFeatureSet(ATAC_object, pattern="^MT-")

VlnPlot(ATAC_object, features = c("nFeature_RNA", "nCount_RNA", "percentage_mtDNA")) 
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

nFeature_RNA_cutoff <- 300  # Example cutoff, adjust as needed
nCount_RNA_cutoff <- c(500, 15000)    # Example cutoff, adjust as needed
percentage_mtDNA_cutoff <- 20  # Example cutoff, adjust as needed

# Create violin plots with horizontal lines
p1 <- VlnPlot(ATAC_object, features = "nFeature_RNA") + 
  geom_hline(yintercept = nFeature_RNA_cutoff, color = "red", linetype = "dashed") +
  ggtitle("nFeature_RNA") + NoLegend()
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
p2 <- VlnPlot(ATAC_object, features = "nCount_RNA") + 
  geom_hline(yintercept = nCount_RNA_cutoff, color = "red", linetype = "dashed") +
  ggtitle("nCount_RNA") + NoLegend()
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
p3 <- VlnPlot(ATAC_object, features = "percentage_mtDNA") + 
  geom_hline(yintercept = percentage_mtDNA_cutoff, color = "red", linetype = "dashed") +
  ggtitle("percentage_mtDNA") + NoLegend()
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
# Arrange plots in a grid
# p1 + p2 + p3
plot_grid(p1, p2, p3, ncol = 3) 

## Scatter plots
FeatureScatter(ATAC_object, feature1="nCount_RNA", 
                           feature2="percentage_mtDNA", pt.size = 1) +
  geom_hline(yintercept = percentage_mtDNA_cutoff, color = "black", linetype = "dashed")

FeatureScatter(ATAC_object, feature1="nCount_RNA", 
                           feature2="nFeature_RNA", pt.size = 1)

More qc plots

ATAC_object.qc <- FetchData(ATAC_object,
                        vars=c("nFeature_RNA","nCount_RNA","percentage_mtDNA"))

scPlot <- ATAC_object.qc %>%
  mutate(keep = if_else(nFeature_RNA > 300 & 
                          nCount_RNA < 15000 &
                          percentage_mtDNA < 20, "keep", "remove")) %>%
  ggplot() +
  geom_point(aes(nCount_RNA, nFeature_RNA, colour=keep), alpha=.8, size = 1) + 
  theme_classic()
scPlot

scPlot <- ATAC_object.qc %>%
  mutate(keep = if_else(nFeature_RNA > 300 & 
                          nCount_RNA < 15000 &
                          percentage_mtDNA < 20, "keep", "remove")) %>%
  ggplot() +
  geom_point(aes(nCount_RNA, nFeature_RNA, colour=keep), alpha=.8, size = 1) +
  scale_x_log10() +
  scale_y_log10() +
  theme_classic()
scPlot

Filtering

# Filtering out poor quality cells and reads
ATAC_object <- subset(ATAC_object, subset = percentage_mtDNA < 20 & nFeature_RNA > 300 & nCount_RNA < 15000)
# After filtering
VlnPlot(ATAC_object, features = c("nCount_RNA", "nFeature_RNA", "percentage_mtDNA"), 
        pt.size = 0.1, log = TRUE)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

ATAC_object
## An object of class Seurat 
## 117757 features across 2613 samples within 1 assay 
## Active assay: RNA (117757 features, 0 variable features)
##  1 layer present: counts

Variables genes

ATAC_object <- FindVariableFeatures(ATAC_object, selection.method = "vst", 
                               nfeatures = 2000)
## Finding variable features for layer counts
top10 <- VariableFeatures(ATAC_object, simplify = FALSE)
#top10$counts
head(top10$counts, 10)
##  [1] "IGLC2"     "IGKC"      "GNLY"      "TCF4"      "IGLC1"     "SOX5"     
##  [7] "LINC01478" "IGHM"      "FCGR3A"    "LINC01374"
#top10$counts

plot1 <- VariableFeaturePlot(ATAC_object, raster = FALSE)
LabelPoints(plot=plot1, points = head(top10$counts, 10), repel = TRUE) + theme(legend.position="none")
## When using repel, set xnudge and ynudge to 0 for optimal results
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

ggsave("~/ATAC/variable_genes.png", dpi = 800)
## Saving 7 x 5 in image
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

##Normalize

#ATAC_object <- SCTransform(ATAC_object)

ATAC_object <- NormalizeData(ATAC_object)
## Normalizing layer: counts
ATAC_object <- ScaleData(ATAC_object)
## Centering and scaling data matrix
ATAC_object = FindVariableFeatures(ATAC_object, selection.method="vst",
                               nfeatures=2000)
## Finding variable features for layer counts
top10 <- VariableFeatures(ATAC_object, simplify = F)
head(top10$counts, 10)
##  [1] "IGLC2"     "IGKC"      "GNLY"      "TCF4"      "IGLC1"     "SOX5"     
##  [7] "LINC01478" "IGHM"      "FCGR3A"    "LINC01374"
plot2 <- VariableFeaturePlot(ATAC_object, raster = FALSE)
LabelPoints(plot=plot2, points = head(top10$counts, 10), repel=T, 
            xnudge=-1, ynudge=0) + theme(legend.position="none")
## When using repel, set xnudge and ynudge to 0 for optimal results
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

##PCA & UMAP

ATAC_object <- RunPCA(ATAC_object, features = VariableFeatures(ATAC_object))
## PC_ 1 
## Positive:  PLXDC2, SLC8A1, LRMDA, FCN1, JAK2, TYMP, IRAK3, NAMPT, TBXAS1, RBM47 
##     VCAN, DMXL2, SAT1, MCTP1, ZEB2, CYBB, TNFAIP2, LRRK2, CLEC7A, LYN 
##     CD36, CSF3R, LYZ, LYST, TLR2, GAB2, CTSS, HCK, STX11, AC020916.1 
## Negative:  BCL11B, CD247, CAMK4, BACH2, BCL2, TC2N, LEF1, IL32, INPP4B, SYNE2 
##     IL7R, THEMIS, CD96, TRAC, LTB, ANK3, RORA, TXK, GRAP2, TRBC2 
##     NR3C2, CCR7, APBA2, CD69, TRBC1, MLLT3, ITGA6, LINC01934, NELL2, RASGRF2 
## PC_ 2 
## Positive:  BCL11B, CD247, IL32, CAMK4, TC2N, INPP4B, THEMIS, IL7R, LEF1, TRAC 
##     PDE3B, TXK, RORA, AOAH, DPYD, FNDC3B, ARHGAP26, APBA2, LINC01934, SAMD3 
##     GRAP2, CTSW, TRBC2, TRBC1, SYNE2, NCALD, ANXA1, RASGRF2, MLLT3, SRGN 
## Negative:  BANK1, MS4A1, PAX5, AFF3, FCRL1, IGHM, EBF1, NIBAN3, OSBPL10, LINC00926 
##     RALGPS2, CD79A, BLK, ADAM28, AP002075.1, CD22, COL19A1, IGHD, IGKC, BCL11A 
##     BLNK, KHDRBS2, PLEKHG1, COBLL1, WDFY4, GNG7, CD79B, AC120193.1, HLA-DQA1, CD74 
## PC_ 3 
## Positive:  GNLY, NKG7, PRF1, KLRD1, GZMA, GZMB, CCL5, CST7, SPON2, FGFBP2 
##     GZMH, ADGRG1, PDGFD, TGFBR3, KLRF1, CCL4, PPP2R2B, C1orf21, CTSW, BNC2 
##     PTGDR, A2M, PYHIN1, CX3CR1, IL2RB, CLIC3, FCGR3A, NCR1, CD160, SAMD3 
## Negative:  LEF1, BACH2, CAMK4, PDE3B, CCR7, IL7R, FHIT, INPP4B, NELL2, ANK3 
##     NR3C2, RASGRF2, PLCL1, LTB, MAL, PRKN, CSGALNACT1, ITGA6, AC139720.1, BCL2 
##     BCL11B, MLLT3, THEMIS, SLC2A3, TSHZ2, PATJ, AL589693.1, SLC16A10, PCAT1, CA6 
## PC_ 4 
## Positive:  PAX5, MS4A1, FCRL1, GNLY, MCTP2, NKG7, LINC00926, EBF1, COL19A1, OSBPL10 
##     PRF1, IGHD, CD79A, MTSS1, BANK1, KLRD1, IGHM, CCL5, CD79B, GZMA 
##     PLEKHG1, FCER2, CST7, CD22, LARGE1, FGFBP2, ADAM28, AC120193.1, AP002075.1, GZMH 
## Negative:  EPHB1, CLEC4C, CUX2, FAM160A1, COL24A1, AC023590.1, NRP1, LINC00996, LINC01478, LILRA4 
##     LINC01374, COL26A1, PTPRS, PLXNA4, PACSIN1, SCN9A, ITM2C, TNFRSF21, SLC35F3, P3H2 
##     RHEX, P2RY6, SLC7A11, AC007381.1, PLD4, ZFAT, PHEX, SCN1A-AS1, CCDC50, KCNK10 
## PC_ 5 
## Positive:  PLCB1, VCAN, VCAN-AS1, ARHGAP24, DYSF, GLT1D1, AC020916.1, PADI4, FNDC3B, CSF3R 
##     CREB5, LINC00937, ARHGAP26, ACSL1, MEGF9, TREM1, IRS2, SLC2A3, JUN, MCTP2 
##     CLMN, CRISPLD2, MIR646HG, PDE4D, LIN7A, RAB11FIP1, GNLY, ANXA1, CR1, NLRP12 
## Negative:  CDKN1C, IFITM3, FCGR3A, CALHM6, CSF1R, AIF1, SMIM25, CST3, HLA-DPA1, LST1 
##     SERPINA1, MS4A7, TCF7L2, CFD, AC104809.2, CCDC26, HMOX1, CTSL, MTSS1, FGL2 
##     IFI30, LRRC25, PAPSS2, COTL1, SIGLEC10, FMNL2, AC020651.2, CD68, LILRB1, HLA-DPB1
scPlot <- DimPlot(ATAC_object, reduction="pca", pt.size = 1)
scPlot

scPlot <- ElbowPlot(ATAC_object)
scPlot

scPlot_elbow <- ElbowPlot(ATAC_object, ndims=50)
scPlot_elbow

pca = ATAC_object[["pca"]]

## get the eigenvalues
evs = pca@stdev^2
total.var = pca@misc$total.variance
varExplained = evs/total.var
pca.data = data.frame(PC=factor(1:length(evs)),
                      percVar=varExplained*100)
pca.data$cumulVar = cumsum(pca.data$percVar)

head(pca.data, 20)
##    PC   percVar  cumulVar
## 1   1 8.5987410  8.598741
## 2   2 3.2664226 11.865164
## 3   3 1.6953758 13.560539
## 4   4 1.5111487 15.071688
## 5   5 1.2189008 16.290589
## 6   6 0.5515784 16.842167
## 7   7 0.4920644 17.334232
## 8   8 0.4251927 17.759424
## 9   9 0.3846802 18.144105
## 10 10 0.3585930 18.502698
## 11 11 0.3043225 18.807020
## 12 12 0.2757073 19.082727
## 13 13 0.2632581 19.345985
## 14 14 0.2447057 19.590691
## 15 15 0.2379580 19.828649
## 16 16 0.2250354 20.053685
## 17 17 0.2206213 20.274306
## 18 18 0.2136148 20.487921
## 19 19 0.2006676 20.688588
## 20 20 0.1907987 20.879387
scPlot_bar <- pca.data[1:15,] %>%
  ggplot(aes(x=PC, y=percVar)) +
  geom_bar(stat='identity') +
  geom_hline(yintercept = 1, colour="red", linetype=3) +
  labs(title="Variance Explanation by PCA") +
  xlab("Principal Components") +
  ylab("Percentage of Explained Variance") +
  theme_bw()
scPlot_bar

#gridExtra::grid.arrange(scPlot_elbow, scPlot_bar, nrow = 2)
scPlot <- pca.data[1:15,] %>%
  ggplot(aes(x=PC, y=cumulVar)) +
  geom_bar(stat='identity') +
  geom_hline(yintercept = 80, colour="red", linetype=3) +
  labs(title="Cumulative Variance Explanation by PCA") +
  xlab("Principal Components") +
  ylab("Cumulative Percentage of Explained Variance") +
  theme_bw()
scPlot

# Heatmap of pc1 and pc2
DimHeatmap(ATAC_object, dims = 1:2, cells = 500)

DimHeatmap(ATAC_object, dims = 1:15, balanced = T)

TSNE

ATAC_object <- RunTSNE(ATAC_object, dims = 1:15)
DimPlot(ATAC_object, label=T, reduction = "tsne") + NoLegend()

# Clustering according to percentage mito
FeaturePlot(ATAC_object, features=c("percentage_mtDNA"), reduction = "tsne")

#UMAP

ATAC_object <- RunUMAP(ATAC_object, dims=1:15, verbose=F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
DimPlot(ATAC_object, label=T, reduction = "umap") + NoLegend()

# Mito check
FeaturePlot(ATAC_object, features=c("percentage_mtDNA"), reduction = "umap")

##Cluster Analysis

ATAC_object <- FindNeighbors(ATAC_object, dims=1:15)
## Computing nearest neighbor graph
## Computing SNN
ATAC_object <- FindClusters(ATAC_object, resolution = 0.125)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2613
## Number of edges: 99016
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9629
## Number of communities: 8
## Elapsed time: 0 seconds
DimPlot(ATAC_object, reduction = "tsne")

# UMAP clusters
DimPlot(ATAC_object, reduction="umap")

##Find Markers

ATAC_object <- JoinLayers(ATAC_object)
ATAC_object.markers = FindAllMarkers(ATAC_object, only.pos=TRUE,
                                 min.pct=0.25, logfc.threshold=0.25)
## Calculating cluster 0
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
#View(ATAC_object.markers)
topMarkers <- ATAC_object.markers %>%
  group_by(cluster) %>%
  top_n(n=5, wt=avg_log2FC)
DoHeatmap(ATAC_object, features = topMarkers$gene) + NoLegend()
## Warning in DoHeatmap(ATAC_object, features = topMarkers$gene): The following
## features were omitted as they were not found in the scale.data slot for the RNA
## assay: AC104370.1, FCRL6, CD28

##Annotation

ref = celldex::HumanPrimaryCellAtlasData()
ref
## class: SummarizedExperiment 
## dim: 19363 713 
## metadata(0):
## assays(1): logcounts
## rownames(19363): A1BG A1BG-AS1 ... ZZEF1 ZZZ3
## rowData names(0):
## colnames(713): GSM112490 GSM112491 ... GSM92233 GSM92234
## colData names(3): label.main label.fine label.ont
#ref1 = celldex::BlueprintEncodeData()
#ref1
my.ATAC = as.SingleCellExperiment(ATAC_object)
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
pred = SingleR(my.ATAC, ref=ref, labels=ref$label.main)
table(pred$labels)
## 
##           B_cell              GMP       HSC_-G-CSF         Monocyte 
##              257                3                1              991 
##          NK_cell Pro-B_cell_CD34+          T_cells 
##              128                2             1231

##Annotate

#Label Annotation 
ATAC_object$SingleR.labels <- pred$labels[match(rownames(ATAC_object@meta.data),
                                           rownames(pred))]

DimPlot(ATAC_object, group.by = "SingleR.labels", reduction = "tsne", label = TRUE) 

DimPlot(ATAC_object, group.by = "SingleR.labels", reduction = "umap", label = TRUE) 

Differential gene expression

# find all markers of cluster 1
cluster.markers <- FindAllMarkers(ATAC_object)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
head(cluster.markers, n = 5)
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster   gene
## PLXDC2     0   6.039091 0.982 0.039         0       0 PLXDC2
## SLC8A1     0   6.199935 0.955 0.032         0       0 SLC8A1
## LRMDA      0   5.771247 0.938 0.031         0       0  LRMDA
## FCN1       0   5.043945 0.966 0.082         0       0   FCN1
## TBXAS1     0   4.479807 0.921 0.070         0       0 TBXAS1
cluster.markers %>% group_by(cluster) %>% dplyr::filter(avg_log2FC > 1) 
## # A tibble: 10,821 × 7
## # Groups:   cluster [8]
##    p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
##    <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr> 
##  1     0       6.04 0.982 0.039         0 0       PLXDC2
##  2     0       6.20 0.955 0.032         0 0       SLC8A1
##  3     0       5.77 0.938 0.031         0 0       LRMDA 
##  4     0       5.04 0.966 0.082         0 0       FCN1  
##  5     0       4.48 0.921 0.07          0 0       TBXAS1
##  6     0       6.06 0.873 0.028         0 0       IRAK3 
##  7     0       6.00 0.871 0.026         0 0       DMXL2 
##  8     0       5.17 0.92  0.091         0 0       NAMPT 
##  9     0       5.09 0.875 0.049         0 0       RBM47 
## 10     0       4.96 0.873 0.051         0 0       MCTP1 
## # ℹ 10,811 more rows
# Markers for b cells
VlnPlot(ATAC_object, features = c("IGHA1","CRIP2", "ITGB1", "FCRL5", "FGR" ))

VlnPlot(ATAC_object, features = c("CD79A", "CD79B", "IL4R","IGKV3-11", "IGHV5-51", 
                             "IGLV3-19"))
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of IGHV5-51.
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of IGLV3-19.

# Markers for raw counts
VlnPlot(ATAC_object, features = c("MTRNR2L12", "RPS26"))

VlnPlot(ATAC_object, features = c("TCL1A", "SNX9"))

DotPlot(ATAC_object, features = topMarkers$gene[1:10]) + RotatedAxis()

p <- DotPlot(ATAC_object, features = topMarkers$gene, group.by = "SingleR.labels") + RotatedAxis()

p + coord_flip()

write.csv(ATAC_object@meta.data, "~/ATAC/3K_ATAC_OBJECT.csv")